Wisconsin Prognosis

Libraries

library(survival)
library(FRESA.CAD)
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
#pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)

The data

dataBreast <- read.csv("~/GitHub/RISKPLOTS/DATA/wpbc.data", header=FALSE)
table(dataBreast$V2)
## 
##   N   R 
## 151  47
rownames(dataBreast) <- dataBreast$V1
dataBreast$V1 <- NULL
dataBreast$status <- 1*(dataBreast$V2=="R")
dataBreast$V2 <- NULL
dataBreast$time <- dataBreast$V3
dataBreast$V3 <- NULL
dataBreast <- sapply(dataBreast,as.numeric)
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
dataBreast <- as.data.frame(dataBreast[complete.cases(dataBreast),])
table(dataBreast$status)
## 
##   0   1 
## 148  46

Modeling

ml <- BSWiMS.model(Surv(time,status)~1,data=dataBreast)

[++++++]

sm <- summary(ml)
pander::pander(sm$coefficients)
Table continues below
  Estimate lower HR upper u.Accuracy r.Accuracy
V24 4.69e-02 1.01 1.05 1.08 0.598 0.237
V26 4.72e-03 1.00 1.00 1.01 0.593 0.237
V27 2.42e-04 1.00 1.00 1.00 0.608 0.237
V34 1.19e-02 1.00 1.01 1.02 0.634 0.237
V7 6.05e-08 1.00 1.00 1.00 0.588 0.237
V35 5.06e-06 1.00 1.00 1.00 0.727 0.237
Table continues below
  full.Accuracy u.AUC r.AUC full.AUC IDI NRI z.IDI
V24 0.598 0.609 0.5 0.609 0.0619 0.437 2.87
V26 0.593 0.598 0.5 0.598 0.0626 0.393 2.77
V27 0.608 0.608 0.5 0.608 0.0563 0.434 2.76
V34 0.634 0.618 0.5 0.618 0.0320 0.471 2.42
V7 0.588 0.595 0.5 0.595 0.0487 0.380 2.30
V35 0.727 0.641 0.5 0.641 0.0289 0.565 2.28
  z.NRI Delta.AUC Frequency
V24 2.67 0.1091 1
V26 2.38 0.0983 1
V27 2.63 0.1084 1
V34 2.85 0.1178 1
V7 2.30 0.0949 1
V35 3.50 0.1412 1

Cox Model Performance

Here we evaluate the model using the RRPlot() function.

The evaluation of the raw Cox model with RRPlot()

Here we will use the predicted event probability assuming a baseline hazard for events withing 5 years

index <- predict(ml,dataBreast)
timeinterval <- 2*mean(subset(dataBreast,status==1)$time)

h0 <- sum(dataBreast$status & dataBreast$time <= timeinterval)
h0 <- h0/sum((dataBreast$time > timeinterval) | (dataBreast$status==1))
pander::pander(t(c(h0=h0,timeinterval=timeinterval)),caption="Initial Parameters")
Initial Parameters
h0 timeinterval
0.323 51.1
rdata <- cbind(dataBreast$status,ppoisGzero(index,h0))
rownames(rdata) <- rownames(dataBreast)

rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90),
                     timetoEvent=dataBreast$time,
                     title="Raw Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

As we can see the Observed probability as well as the Time vs. Events are not calibrated.

Uncalibrated Performance Report

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.42042 0.2640 0.1655 1.61e-01 0.497448
RR 2.18301 2.2857 4.3220 2.50e+01 2.307692
RR_LCI 1.30105 1.3037 0.6348 5.22e-02 1.275480
RR_UCI 3.66282 4.0074 29.4258 1.20e+04 4.175246
SEN 0.26087 0.6957 0.9783 1.00e+00 0.152174
SPE 0.89865 0.5608 0.1081 6.76e-02 0.952703
BACC 0.57976 0.6282 0.5432 5.34e-01 0.552438
NetBenefit 0.00578 0.0448 0.0971 1.00e-01 0.000374
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.844 0.618 1.13 0.278
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.02 1.02 0.976 1.08
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
0.792 0.793 0.786 0.799
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.68 0.678 0.594 0.754
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.637 0.546 0.728
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.261 0.143 0.411
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.899 0.838 0.942
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.418
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 11.608565 on 1 degrees of freedom, p = 0.000656
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 167 34 41.1 1.23 11.6
class=1 27 12 4.9 10.27 11.6

Cox Calibration

op <- par(no.readonly = TRUE)


calprob <- CoxRiskCalibration(ml,dataBreast,"status","time")

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain DeltaTime
0.25 0.774 40.9

The RRplot() of the calibrated model

h0 <- calprob$h0
timeinterval <- calprob$timeInterval;

rdata <- cbind(dataBreast$status,calprob$prob)


rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90),
                     timetoEvent=dataBreast$time,
                     title="Calibrated Train: Breast",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Calibrated Train Performance

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.3445 0.2113 0.131 1.27e-01 0.49512
RR 2.1830 2.2857 4.322 2.50e+01 2.54422
RR_LCI 1.3011 1.3037 0.635 5.22e-02 1.27021
RR_UCI 3.6628 4.0074 29.426 1.20e+04 5.09603
SEN 0.2609 0.6957 0.978 1.00e+00 0.08696
SPE 0.8986 0.5608 0.108 6.76e-02 0.97973
BACC 0.5798 0.6282 0.543 5.34e-01 0.53334
NetBenefit 0.0212 0.0752 0.130 1.33e-01 0.00546
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.87 0.637 1.16 0.408
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.06 1.06 1.01 1.11
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
0.962 0.962 0.953 0.97
pander::pander(t(rrAnalysisTrain$c.index$cstatCI),caption="C. Index")
C. Index
mean.C Index median lower upper
0.68 0.682 0.603 0.758
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.637 0.546 0.728
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.261 0.143 0.411
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.899 0.838 0.942
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.343
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 11.608565 on 1 degrees of freedom, p = 0.000656
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 167 34 41.1 1.23 11.6
class=1 27 12 4.9 10.27 11.6

Cross-Validation

Here we use the estimated h0 and timeinterval from the full set

rcv <- randomCV(theData=dataBreast,
                theOutcome = Surv(time,status)~1,
                fittingFunction=BSWiMS.model, 
                trainFraction = 0.9,
                repetitions=100,
                classSamplingType = "Pro"
         )

.[+++++].[++++].[+++].[++].[++++++].[+++++++++].[-++].[+++].[+++].[++++]10 Tested: 127 Avg. Selected: 4.6 Min Tests: 1 Max Tests: 5 Mean Tests: 1.574803 . MAD: 0.4847067

.[++++++].[++++].[++++++++].[++++++++].[++++++++].[+].[+++++++].[+++++++].[–].[++++]20 Tested: 172 Avg. Selected: 5.3 Min Tests: 1 Max Tests: 6 Mean Tests: 2.325581 . MAD: 0.4774552

.[+++].[+++++].[+++].[+++].[++].[++++].[++++].[+++++].[-+++].[+++++]30 Tested: 187 Avg. Selected: 4.9 Min Tests: 1 Max Tests: 8 Mean Tests: 3.208556 . MAD: 0.4783071

.[++].[+++++].[+].[+++++].[+++].[++++].[++++++].[+++++].[+++].[++++]40 Tested: 191 Avg. Selected: 4.875 Min Tests: 1 Max Tests: 10 Mean Tests: 4.188482 . MAD: 0.4808877

.[++++++].[+++].[++++].[++++].[++++].[+++++++].[++++].[+++++].[+++++].[++++]50 Tested: 193 Avg. Selected: 4.98 Min Tests: 1 Max Tests: 12 Mean Tests: 5.181347 . MAD: 0.4835019

.[++++].[++++++].[++++].[+++++].[+++++].[++++++].[+++++].[++++].[+++].[+++]60 Tested: 194 Avg. Selected: 5.066667 Min Tests: 1 Max Tests: 13 Mean Tests: 6.185567 . MAD: 0.4835778

.[+++++++].[+++++++++].[++++++].[++++++].[+++++++].[+].[++++++++].[+++].[++++].[++++]70 Tested: 194 Avg. Selected: 5.2 Min Tests: 1 Max Tests: 16 Mean Tests: 7.216495 . MAD: 0.484655

.[++++].[++++].[++++].[+++].[+++].[+++].[++].[+++++++].[+++++].[+++++++]80 Tested: 194 Avg. Selected: 5.1625 Min Tests: 2 Max Tests: 16 Mean Tests: 8.247423 . MAD: 0.4841967

.[+++++].[+++++].[+++++].[++].[+++++].[+++++++].[++++].[+++].[+++++++].[++++]90 Tested: 194 Avg. Selected: 5.144444 Min Tests: 2 Max Tests: 17 Mean Tests: 9.278351 . MAD: 0.4839212

.[+++].[+++].[++++].[+++++].[+++++].[++++].[++++++].[+++].[+++++++].[+++]100 Tested: 194 Avg. Selected: 5.16 Min Tests: 2 Max Tests: 18 Mean Tests: 10.30928 . MAD: 0.4843842

stp <- rcv$survTestPredictions
stp <- stp[!is.na(stp[,4]),]

bbx <- boxplot(unlist(stp[,1])~rownames(stp),plot=FALSE)
times <- bbx$stats[3,]
status <- boxplot(unlist(stp[,2])~rownames(stp),plot=FALSE)$stats[3,]
prob <- ppoisGzero(boxplot(unlist(stp[,4])~rownames(stp),plot=FALSE)$stats[3,],h0)

rdatacv <- cbind(status,prob)
rownames(rdatacv) <- bbx$names
names(times) <- bbx$names

rrAnalysisTest <- RRPlot(rdatacv,atThr = rrAnalysisTrain$thr_atP,
                     timetoEvent=times,
                     title="Test: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Cross-Validation Test Performance

pander::pander(t(rrAnalysisTest$keyPoints),caption="Threshold values")
Threshold values
  @:0.343 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.34323 0.2008 0.131 1.13e-01 0.49302
RR 1.40625 2.1967 2.814 1.47e+01 1.95767
RR_LCI 0.78018 1.2343 0.733 3.13e-02 0.89980
RR_UCI 2.53472 3.9096 10.809 6.89e+03 4.25926
SEN 0.21739 0.7174 0.957 1.00e+00 0.08696
SPE 0.85135 0.5203 0.135 4.05e-02 0.96622
BACC 0.53437 0.6188 0.546 5.20e-01 0.52659
NetBenefit -0.00771 0.0781 0.128 1.44e-01 -0.00444
pander::pander(t(rrAnalysisTest$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.849 0.622 1.13 0.307
pander::pander(t(rrAnalysisTest$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.02 1.02 0.973 1.08
pander::pander(t(rrAnalysisTest$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
0.869 0.869 0.852 0.885
pander::pander(rrAnalysisTest$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.649 0.647 0.565 0.728
pander::pander(t(rrAnalysisTest$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.603 0.509 0.696
pander::pander((rrAnalysisTest$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.217 0.109 0.364
pander::pander((rrAnalysisTest$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.851 0.784 0.904
pander::pander(t(rrAnalysisTest$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.343
pander::pander(rrAnalysisTest$surdif,caption="Logrank test")
Logrank test Chisq = 2.600306 on 1 degrees of freedom, p = 0.106843
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 162 36 39.73 0.351 2.6
class=1 32 10 6.27 2.225 2.6

Calibrating the test results

rdatacv <- cbind(status,prob,times)
calprob <- CalibrationProbPoissonRisk(rdatacv)

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain DeltaTime
0.316 0.979 41.4
timeinterval <- calprob$timeInterval;

rdata <- cbind(status,calprob$prob)


rrAnalysisTest <- RRPlot(rdata,atRate=c(0.90),
                     timetoEvent=times,
                     title="Calibrated Test: Breast",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

### Calibrated Test Performance

pander::pander(t(rrAnalysisTest$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.3620 0.1970 0.128 1.11e-01 5.01e-01
RR 1.3344 2.1967 2.814 1.47e+01 2.21e+00
RR_LCI 0.6783 1.2343 0.733 3.13e-02 1.05e+00
RR_UCI 2.6251 3.9096 10.809 6.89e+03 4.65e+00
SEN 0.1522 0.7174 0.957 1.00e+00 8.70e-02
SPE 0.8919 0.5203 0.135 4.05e-02 9.73e-01
BACC 0.5220 0.6188 0.546 5.20e-01 5.30e-01
NetBenefit -0.0107 0.0803 0.130 1.46e-01 -8.43e-05
pander::pander(t(rrAnalysisTest$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.876 0.641 1.17 0.407
pander::pander(t(rrAnalysisTest$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.06 1.06 1 1.11
pander::pander(t(rrAnalysisTest$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
0.883 0.884 0.867 0.9
pander::pander(rrAnalysisTest$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.649 0.648 0.568 0.723
pander::pander(t(rrAnalysisTest$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.603 0.509 0.696
pander::pander((rrAnalysisTest$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.152 0.0634 0.289
pander::pander((rrAnalysisTest$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.899 0.838 0.942
pander::pander(t(rrAnalysisTest$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90%
0.364
pander::pander(rrAnalysisTest$surdif,caption="Logrank test")
Logrank test Chisq = 2.539316 on 1 degrees of freedom, p = 0.111043
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 172 39 42.02 0.217 2.54
class=1 22 7 3.98 2.295 2.54